Note: Open scripts.Rproj first, then script. To easily use relative paths, click the down button next to knit and then click “Knit Directory –> Project Directory”. This should make loading and saving files much easier.

GO Semantic Similarity

Perform GO Semantic Similarity analysis to gain additional insights into our DGE enriched GO terms, using results from DGE analysis (i.e., after running the 1a-DGE.Rmd script). Red text indicates regions that require the user to modify. Regardless, the user should check over all code blocks to ensure that everything is running correctly.

1. Load packages

Load packages

library(tidyverse)
library(org.Hs.eg.db)
library(simplifyEnrichment)
library(magick)

2. Set variables

Change file names and conditions where appropriate.

# Input unfiltered data
GO_term.file <- "salmon.numreads.DGE_results.GOsig.tsv"

3. Load, clean, and pre-processing datasets

Load GO data and select only the GO_IDs associated with BP, MF, and CC terms.

GO <- read.csv(GO_term.file, header=TRUE, sep="\t")

# Print stats
print(paste("Number sig BP terms: ", nrow(filter(GO, ontology=="BP")), sep=''))
## [1] "Number sig BP terms: 53"
print(paste("Number sig MF terms: ", nrow(filter(GO, ontology=="MF")), sep=''))
## [1] "Number sig MF terms: 10"
print(paste("Number sig CC terms: ", nrow(filter(GO, ontology=="CC")), sep=''))
## [1] "Number sig CC terms: 13"
# Extract BP, MF, and CC separatly
GO.BP <- GO %>% filter(ontology == "BP")
BP <- GO.BP$category
GO.MF <- GO %>% filter(ontology == "MF")
MF <- GO.MF$category
GO.CC <- GO %>% filter(ontology == "CC")
CC <- GO.CC$category

4. Calculate a similarity matrix and save the output

simplifyGO(GO_similarity(BP, ont = "BP", db = "org.Hs.eg.db"),
           word_cloud_grob_param = list(max_width = 50),
           max_words=20
          )
## Cluster 53 terms by 'binary_cut'... 22 clusters, used 0.04145217 secs.
## Perform keywords enrichment for 22 GO lists...

simplifyGO(GO_similarity(MF, ont = "MF", db = "org.Hs.eg.db"),
           word_cloud_grob_param = list(max_width = 50),
           max_words=20
          )
## Cluster 10 terms by 'binary_cut'... 6 clusters, used 0.004768848 secs.
## Perform keywords enrichment for 6 GO lists...

simplifyGO(GO_similarity(CC, ont = "CC", db = "org.Hs.eg.db"),
           word_cloud_grob_param = list(max_width = 50),
           max_words=20
          )
## Cluster 13 terms by 'binary_cut'... 10 clusters, used 0.006582022 secs.
## Perform keywords enrichment for 10 GO lists...

5. Plot term significance values

GO.BP %>%
  mutate(term = fct_reorder(term, over_represented_pvalue, .desc = TRUE)) %>%
  ggplot(aes(x=term, y=over_represented_pvalue) ) +
      geom_segment( aes(x=term ,xend=term, y=0, yend=over_represented_pvalue), color="grey") +
      geom_point(size=3, color="#69b3a2") +
      coord_flip() +
      theme(
        panel.grid.minor.y = element_blank(),
        panel.grid.major.y = element_blank(),
        legend.position="none"
      ) +
  xlab("") +
  ylab("over_represented_pvalue") +
  ggtitle("Adult") + #add a main title
  theme(plot.title = element_text(face = 'bold',
                                  size = 12,
                                  hjust = 0)) +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank(),#Set the plot background
        legend.position="none")

GO.MF %>%
  mutate(term = fct_reorder(term, over_represented_pvalue, .desc = TRUE)) %>%
  ggplot(aes(x=term, y=over_represented_pvalue) ) +
      geom_segment( aes(x=term ,xend=term, y=0, yend=over_represented_pvalue), color="grey") +
      geom_point(size=3, color="#69b3a2") +
      coord_flip() +
      theme(
        panel.grid.minor.y = element_blank(),
        panel.grid.major.y = element_blank(),
        legend.position="none"
      ) +
  xlab("") +
  ylab("over_represented_pvalue") +
  ggtitle("Adult") + #add a main title
  theme(plot.title = element_text(face = 'bold',
                                  size = 12,
                                  hjust = 0)) +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank(),#Set the plot background
        legend.position="none")

GO.CC %>%
  mutate(term = fct_reorder(term, over_represented_pvalue, .desc = TRUE)) %>%
  ggplot(aes(x=term, y=over_represented_pvalue) ) +
      geom_segment( aes(x=term ,xend=term, y=0, yend=over_represented_pvalue), color="grey") +
      geom_point(size=3, color="#69b3a2") +
      coord_flip() +
      theme(
        panel.grid.minor.y = element_blank(),
        panel.grid.major.y = element_blank(),
        legend.position="none"
      ) +
  xlab("") +
  ylab("over_represented_pvalue") +
  ggtitle("Adult") + #add a main title
  theme(plot.title = element_text(face = 'bold',
                                  size = 12,
                                  hjust = 0)) +
  theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     panel.grid.major = element_blank(), #Set major gridlines
                     panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank(),#Set the plot background
        legend.position="none")